from app.scripts.ocr_layout_task import do_process


def parse_ocr_layout_pdf(input_path: str, save_dir: str, config: dict) -> dir:
    """
    layout、ocr处理pdf文件
    :param input_path: 输入文件全路径
    :param save_dir: 保存路径
    :param config: 配置
    :return:
    """
    def img_gen(figure, sv_format='JPEG'):
        return {
            "fpath_name": figure['name'],
            "file": figure['image'],
            "label": figure['label'],
            "type": sv_format
        }
    contents, figures = do_process(config=config, file_path=input_path, save_dir=save_dir)
    figures = [img_gen(i) for figure in figures for i in figure]
    return {
        'contents': contents,
        'figures': figures,
    }




